{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "29e4b50f",
   "metadata": {},
   "source": [
    "# a) Análise Exploratória dos Dados"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6769b8e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Carregar dados\n",
    "df = pd.read_csv('enape_raw_dataset.csv')\n",
    "\n",
    "# Exibir as primeiras linhas\n",
    "print(df.head())\n",
    "\n",
    "# Verificar estatísticas básicas\n",
    "print(df.describe())\n",
    "\n",
    "# Verificar valores ausentes\n",
    "print(df.isnull().sum())\n",
    "\n",
    "# Distribuição da variável PC3_6 (indicador de evasão)\n",
    "sns.countplot(data=df, x='PC3_6')\n",
    "plt.title('Distribuição de Evasão (PC3_6)')\n",
    "plt.xlabel('Evadido (1 = Sim, 0 = Não)')\n",
    "plt.ylabel('Contagem')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "05884fe9",
   "metadata": {},
   "source": [
    "# b) Preparação e Engenharia de Atributos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2f6ee799",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Definir variável alvo\n",
    "df = df[df['PC3_6'].isin([0, 1])]  # Remove entradas inválidas\n",
    "df['evadiu'] = df['PC3_6']\n",
    "\n",
    "# Preencher valores ausentes numéricos com a média\n",
    "for col in df.select_dtypes(include=['float64', 'int64']).columns:\n",
    "    df[col] = df[col].fillna(df[col].mean())\n",
    "\n",
    "# Codificar variáveis categóricas\n",
    "df = pd.get_dummies(df, drop_first=True)\n",
    "\n",
    "# Separar features e target\n",
    "X = df.drop(['evadiu'], axis=1)\n",
    "y = df['evadiu']\n",
    "\n",
    "# Normalização\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "scaler = StandardScaler()\n",
    "X = scaler.fit_transform(X)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "46168b0c",
   "metadata": {},
   "source": [
    "# c) Implementação do Modelo (PyTorch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bcbe05c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "\n",
    "class RegressaoLogistica(nn.Module):\n",
    "    def __init__(self, input_dim):\n",
    "        super(RegressaoLogistica, self).__init__()\n",
    "        self.linear = nn.Linear(input_dim, 1)\n",
    "    \n",
    "    def forward(self, x):\n",
    "        return torch.sigmoid(self.linear(x))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf09232b",
   "metadata": {},
   "source": [
    "# d) Treinamento do Modelo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "269476fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
    "\n",
    "# Converter para tensores do PyTorch\n",
    "X_train_tensor = torch.tensor(X_train, dtype=torch.float32)\n",
    "y_train_tensor = torch.tensor(y_train.values.reshape(-1, 1), dtype=torch.float32)\n",
    "X_test_tensor = torch.tensor(X_test, dtype=torch.float32)\n",
    "y_test_tensor = torch.tensor(y_test.values.reshape(-1, 1), dtype=torch.float32)\n",
    "\n",
    "# Modelo, loss e otimizador\n",
    "model = RegressaoLogistica(X.shape[1])\n",
    "criterion = nn.BCELoss()\n",
    "optimizer = torch.optim.Adam(model.parameters(), lr=0.01)\n",
    "\n",
    "# Treinamento\n",
    "for epoch in range(100):\n",
    "    model.train()\n",
    "    outputs = model(X_train_tensor)\n",
    "    loss = criterion(outputs, y_train_tensor)\n",
    "    optimizer.zero_grad()\n",
    "    loss.backward()\n",
    "    optimizer.step()\n",
    "    \n",
    "    if epoch % 10 == 0:\n",
    "        print(f'Época [{epoch}/100], Loss: {loss.item():.4f}')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "90a00bd5",
   "metadata": {},
   "source": [
    "# e) Avaliação do Modelo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "270617e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Avaliação\n",
    "model.eval()\n",
    "with torch.no_grad():\n",
    "    y_pred = model(X_test_tensor)\n",
    "    y_pred_class = (y_pred >= 0.5).int()\n",
    "    \n",
    "from sklearn.metrics import classification_report, confusion_matrix\n",
    "\n",
    "print(\"Matriz de Confusão:\")\n",
    "print(confusion_matrix(y_test_tensor, y_pred_class))\n",
    "print(\"\n",
    "Relatório de Classificação:\")\n",
    "print(classification_report(y_test_tensor, y_pred_class))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fdb2766c",
   "metadata": {},
   "source": [
    "# f) Documentação dos Resultados"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3fdb3b37",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Gráfico de barras para acurácia por classe\n",
    "import numpy as np\n",
    "\n",
    "report = classification_report(y_test_tensor, y_pred_class, output_dict=True)\n",
    "classes = ['Não Evadiu', 'Evadiu']\n",
    "scores = [report['0']['f1-score'], report['1']['f1-score']]\n",
    "\n",
    "plt.bar(classes, scores, color=['blue', 'red'])\n",
    "plt.title('F1-score por classe')\n",
    "plt.ylabel('F1-score')\n",
    "plt.ylim(0, 1)\n",
    "plt.show()\n"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
